Esta página contiene el código para generar análisis de redes personales (ego networks) en Twitter.
library(rtweet)
source("createTokens.R") ## keys y tokens privados
source("rtweet_functions.R") ## funciones para trabajar con múltiples tokens
library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
library(ggwordcloud)
library(tidytext)
theme_set(theme_custom())
El primer paso consiste en escoger un usuario focal (o “ego”) a partir del cual construímos una red personal.
ego <- "sergemont" #
ego_info <- lookup_users(ego, token = sample(token, 1))
ego_info$followers_count
## [1] 8673
Nombre: Sergio Montero
Usuario: sergemont
Seguidores: 8673
Amigos: 2419
Se unió a Twitter en 2012-09-09 14:19:18
Este análisis está dividido en tres partes.
Cada una de estas tres dimensiones corresponde a flujos de interacción diferentes. La primera consiste de los usuarios que reciben información de sergemont, la segunda son los usuarios que generan la información recibida por sergemont, y la tercera consiste en los usuarios donde el flujo de información es recíproco.
Este código es de acceso libre excepto por los keys y tokens privados que se consiguen abriendo una cuenta de desarrollador en https://developer.twitter.com/
El siguiente código extrae la lista de seguidores de sergemont (cada uno identificado con un user_id).
ego_followers <- get_followers(ego, token = sample(token, 1), n = ego_info$followers_count)
ego_followers
## # A tibble: 8,673 x 1
## user_id
## <chr>
## 1 120180081
## 2 68043845
## 3 1330931515224715265
## 4 94231765
## 5 122851379
## 6 803399965
## 7 1019674215769726976
## 8 192409907
## 9 256959302
## 10 1326849155243839490
## # … with 8,663 more rows
Este user_id es exclusivo a cada cuenta, incluso cuando el usuario decide cambiar su nombre.
El siguiente código crea una carpeta llamada *_friends_of_followers/ donde queda archivado la lista de los seguidores de cada uno de estos usuarios.
Dependiendo del número de usuarios y el número de Tokens, esto puede llegar a demorarse varias horas (o incluso días).
outfolder <- paste0(ego, "_friends_of_followers/")
if (!dir.exists(outfolder)) dir.create(outfolder)
users_done <- str_replace(dir(outfolder), ".rds", "")
users_left <- setdiff(ego_followers$user_id, users_done)
while (length(users_left) > 0) {
new_user <- users_left[[1]]
friends_of_user <- try(multi_get_friends(new_user, token))
file_name <- str_glue("{outfolder}{new_user}.rds")
write_rds(friends_of_user, file_name, compress = "gz")
users_left <- users_left[-which(users_left %in% new_user)] ## int. subset
}
Para algunos usuarios esta información es imposible de conseguir porque son cuentas protegidas.
En este caso, no se puede obtener información sobre el 7.5% de los sequidores de sergemont.
Para construir la red, tomamos toda la lista de usuarios y sus amigos y los organizamos en dos columnas, donde cada fila indica un usario (from) siguiendo a otro usario (to).
edge_list <- list.files(outfolder, full.names = TRUE) %>%
map(read_rds)
edge_list <- edge_list[-error_index] %>%
bind_rows()
edge_list
## # A tibble: 12,073,379 x 2
## from to
## <chr> <chr>
## 1 1000056137255825410 1325845228398342144
## 2 1000056137255825410 28221296
## 3 1000056137255825410 707384515979706368
## 4 1000056137255825410 87697296
## 5 1000056137255825410 252589325
## 6 1000056137255825410 903239720
## 7 1000056137255825410 189385477
## 8 1000056137255825410 563225180
## 9 1000056137255825410 710121377391628292
## 10 1000056137255825410 1472812134
## # … with 12,073,369 more rows
Aquí hay 12,073,379 conexiones. Sin embargo, aquí están incluídos conexiones on usuarios más allá de los que siguen a sergemont.
ego_followers_info <- lookup_users(ego_followers$user_id, token = sample(token), 1)
write_rds(ego_followers_info, paste0(ego, "_follower_info.rds"), compress = "gz")
También podemos conseguir metadatos sobre cada usuario.
ego_followers_info <- read_rds(paste0(ego, "_follower_info.rds")) %>%
filter(!protected) %>%
select(
user_id, screen_name, lang, name, location, description,
ends_with("count"), -starts_with("quote"),
-starts_with("retweet"), -reply_count,
-starts_with("fav")
) %>%
rename(name = user_id, user_name = name)
id_dict <- ego_followers_info %>%
select(name, screen_name) %>%
deframe()
Por ejemplo, esta es la información que corresponde a los seguidores de sergemont con mayor número de seguidores.
ego_followers_info %>%
arrange(desc(followers_count)) %>%
select(screen_name, description, location, followers_count, friends_count)
## # A tibble: 8,009 x 5
## screen_name description location followers_count friends_count
## <chr> <chr> <chr> <int> <int>
## 1 miguelhotero "Presidente Editor Di… "El Nacion… 1804453 1064718
## 2 BogotaET "Información del acon… "Bogotá" 1121542 1444
## 3 Rmayorga "Periodista univallun… "ÜT: 4.663… 597097 2706
## 4 AngelicaLoz… "Ciudadana, senadora … "" 464197 16375
## 5 ClaMoralesM "Periodista. \nColumn… "Armenia" 414681 2517
## 6 VanessaRosa… "Escritora • Crítica … "Bogotá, C… 299383 1565
## 7 mapatilla "Escribo punto al fin… "Colombia" 253829 1417
## 8 VivaAirCol "Somos los más puntua… "Colombia" 230489 38883
## 9 JuanSheput "Político e ingeniero… "Lima, Per… 196957 2614
## 10 Uniandes "Universidad de los A… "Bogotá, C… 140972 162
## # … with 7,999 more rows
Finalmente nos interesa la red personal de seguidores de sergemont, por lo cual eliminamos las conexiones entre usuarios que se encuentran por fuera de sus 8673
edge_list <- edge_list %>%
filter(to %in% ego_followers_info$name) %>%
filter(from %in% ego_followers_info$name)
edge_list
## # A tibble: 437,424 x 2
## from to
## <chr> <chr>
## 1 1000056137255825410 15432179
## 2 1000056137255825410 848923514119884800
## 3 1000056137255825410 2473739315
## 4 1000056137255825410 142240353
## 5 1000056137255825410 397366316
## 6 1000224690491871232 82742830
## 7 1000224690491871232 1104834306
## 8 1000224690491871232 1148049055341326336
## 9 1000224690491871232 167394234
## 10 1000224690491871232 37193785
## # … with 437,414 more rows
La red personal de seguidores de sergemont que pudimos reconstruir tiene 8009 usuarios con 437424 conexiones.
ego_network <- edge_list %>%
tidygraph::as_tbl_graph() %>%
left_join(ego_followers_info) %>%
rename(name = screen_name, user_id = name) %>%
select(name, everything())
ego_network
## # A tbl_graph: 7857 nodes and 437424 edges
## #
## # A directed simple graph with 1 component
## #
## # Node Data: 7,857 x 10 (active)
## name user_id lang user_name location description followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Mago… 100005… es Magove "" "" 101
## 2 Juan… 100022… es JuanitoM… "Bogotá… "" 69
## 3 Vale… 100022… es Valeria "Colomb… "Derecho. … 134
## 4 juli… 100037… es J U L I … "Bogotá… "" 2536
## 5 Gafa… 100044… es Jesús Da… "Monter… "Política-… 57
## 6 susa… 100047… pt Susana Y… "" "" 33
## # … with 7,851 more rows, and 3 more variables: friends_count <int>,
## # listed_count <int>, statuses_count <int>
## #
## # Edge Data: 437,424 x 2
## from to
## <int> <int>
## 1 1 2304
## 2 1 7122
## 3 1 3592
## # … with 437,421 more rows
## Estadísticas descriptivas
ego_network <- ego_network %>%
mutate(
out_degree = centrality_degree(mode = "out"),
in_degree = centrality_degree(mode = "in"),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
ego_network
## # A tbl_graph: 7857 nodes and 437424 edges
## #
## # A directed simple graph with 1 component
## #
## # Node Data: 7,857 x 15 (active)
## name user_id lang user_name location description followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Mago… 100005… es Magove "" "" 101
## 2 Juan… 100022… es JuanitoM… "Bogotá… "" 69
## 3 Vale… 100022… es Valeria "Colomb… "Derecho. … 134
## 4 juli… 100037… es J U L I … "Bogotá… "" 2536
## 5 Gafa… 100044… es Jesús Da… "Monter… "Política-… 57
## 6 susa… 100047… pt Susana Y… "" "" 33
## # … with 7,851 more rows, and 8 more variables: friends_count <int>,
## # listed_count <int>, statuses_count <int>, out_degree <dbl>,
## # in_degree <dbl>, betweenness <dbl>, authority_score <dbl>,
## # eigen_centrality <dbl>
## #
## # Edge Data: 437,424 x 2
## from to
## <int> <int>
## 1 1 2304
## 2 1 7122
## 3 1 3592
## # … with 437,421 more rows
La siguiente gráfica muestra la influencia de cada usuario en Twitter (eje horizontal) vs la influencia de cada usuario dentro de la red personal de seguidores (eje vertical)
ego_network %>%
as_tibble() %>%
#filter(in_degree > 5) %>%
ggplot(aes(followers_count, in_degree)) +
geom_point()
ego_network %>%
as_tibble() %>%
mutate(label_name = ifelse(
test = rank(-followers_count) <= 10 | rank(-in_degree) <= 10,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(followers_count, in_degree)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_name), size = 3)
Clusters
set.seed(123)
clusters <- igraph::cluster_walktrap(graph = ego_network, steps = 7)
cluster_df <- tibble(cluster = factor(clusters$membership), name = clusters$names)
cluster_df <- cluster_df %>%
group_by(cluster) %>%
filter(n() >= 10) %>%
ungroup()
ego_network <- ego_network %>%
left_join(cluster_df)
ego_network %>%
as_tibble() %>%
arrange(desc(in_degree)) %>%
filter(!is.na(cluster)) %>%
group_by(cluster) %>%
filter(rank(-authority_score) <= 30) %>%
ggplot(aes(label = name, size = log(in_degree), color = in_degree)) +
geom_text_wordcloud_area(family = "Avenir Next Condensed") +
facet_wrap(~cluster) +
labs(title = "Seguidores prominentes en cada cluster") +
scale_color_gradient(low = "grey", high = "purple")
Tamaño de cada cluster:
ego_network %>% as_tibble() %>% count(cluster)
## # A tibble: 9 x 2
## cluster n
## <fct> <int>
## 1 1 380
## 2 2 162
## 3 3 2181
## 4 4 249
## 5 5 311
## 6 6 3941
## 7 7 338
## 8 11 129
## 9 <NA> 166
¿Quiénes son los usuarios que funcionan como “puentes”?
ego_network %>%
as_tibble() %>%
arrange(desc(betweenness)) %>%
select(name, description, location)
## # A tibble: 7,857 x 3
## name description location
## <chr> <chr> <chr>
## 1 malbarracin "Santandereano en el exilio. Abogado, activis… "Bogotá"
## 2 AngelicaLo… "Ciudadana, senadora de Colombia 🇨🇴 Partido V… ""
## 3 psanabria "Public Policy & Management Scholar | Profeso… "Latin America"
## 4 JuanitaGoe "Representante a la Cámara por Bogotá (2018-2… "Bogotá, D.C., Co…
## 5 ErikVergel "PhD City Regional Planning @UNC Master Urban… "Bogotá, D.C., Co…
## 6 LaCiudadVe… "Promovemos ciudades y regiones más sostenibl… "América Latina"
## 7 riveraalza… "Concejal de Bogotá. Alianza Verde. Gestor Ur… "Bogotá"
## 8 Afbrico "#ElProfeDeFút⚽\n\nLas opiniones acá expresad… "Polombia"
## 9 miguelhote… "Presidente Editor Diario El Nacional" "El Nacional, Los…
## 10 Rivas_Sant… "Director/presentador de Puntos Capitales. Pa… ""
## # … with 7,847 more rows
cols <- c("betweenness", "in_degree", "out_degree", "followers_count", "friends_count")
ego_network %>%
as_tibble() %>%
group_by(cluster) %>%
summarize(across(all_of(cols), mean)) %>%
arrange(desc(betweenness))
## # A tibble: 9 x 6
## cluster betweenness in_degree out_degree followers_count friends_count
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 20153. 54.1 51.8 1945. 1563.
## 2 3 15765. 67.4 68.7 1825. 1717.
## 3 2 13579. 42.4 40.3 1712. 1775.
## 4 4 12667. 35.2 37.9 1677. 1776.
## 5 6 12059. 54.4 54.4 2368. 1388.
## 6 5 7662. 31.4 31.2 1719. 1864.
## 7 7 6854. 65.9 60.6 1567. 1520.
## 8 11 4923. 57.6 56.1 1069. 1157.
## 9 <NA> 1640. 1.52 1.81 1603. 1427.
Dada la información anterior podemos enfocarnos en segmentos particulares de la red personal.
Por ejemplo, podemos enfocarnos exclusivamente en los usuarios que hacen parte de los grupos etiquetados con 12 y 4.
ego_network_subset <- ego_network %>%
filter(cluster %in% c(12, 4, 2, 6)) %>%
mutate(
out_degree = centrality_degree(mode = "out"),
in_degree = centrality_degree(mode = "in"),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
ego_network_subset %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = in_degree),
shape = 21, color = "white", show.legend = FALSE)
ego_network_subset %>%
as_tibble() %>%
mutate(label_id = ifelse(
test = rank(-betweenness) <= 10 |rank(-in_degree) <= 10,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(betweenness, in_degree, color = cluster)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_id), size = 3)
ego_network_subset %>%
group_by(cluster) %>%
mutate(label_name = ifelse(
test = rank(-authority_score) <= 5 | rank(-betweenness) <= 5,
yes = name,
no = NA_character_
)) %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = betweenness),
shape = 21, color = "white", show.legend = FALSE) +
geom_node_label(aes(label = label_name),
repel = TRUE, alpha = 3/4, size = 3)
Esta sección repite el análisis anterior para la red personal de amigos de sergemont
outfolder <- paste0(ego, "_friends_of_friends/")
if (!dir.exists(outfolder)) dir.create(outfolder)
ego_friends <- get_friends(ego, token = sample(token, 1))
ego_friends
## # A tibble: 2,419 x 2
## user user_id
## <chr> <chr>
## 1 sergemont 66676632
## 2 sergemont 599551829
## 3 sergemont 1219662920700911616
## 4 sergemont 19128902
## 5 sergemont 3149487047
## 6 sergemont 734748619861983232
## 7 sergemont 419533496
## 8 sergemont 95654761
## 9 sergemont 1315224240304648194
## 10 sergemont 1318461879409401859
## # … with 2,409 more rows
users_done <- str_replace(dir(outfolder), ".rds", "")
users_left <- setdiff(ego_friends$user_id, users_done)
while (length(users_left) > 0) {
new_user <- users_left[[1]]
friends_of_user <- try(multi_get_friends(new_user, token))
file_name <- str_glue("{outfolder}{new_user}.rds")
write_rds(friends_of_user, file_name, compress = "gz")
users_left <- users_left[-which(users_left %in% new_user)] ## int. subset
}
En este caso, no se puede obtener información sobre el 0.2% de los amigos de sergemont.
edge_list <- list.files(outfolder, full.names = TRUE) %>%
map(read_rds)
edge_list <- edge_list[-error_index] %>% bind_rows()
edge_list
## # A tibble: 3,989,460 x 2
## from to
## <chr> <chr>
## 1 1000369692278738944 19103793
## 2 1000369692278738944 980078479269089280
## 3 1000369692278738944 99378819
## 4 1000369692278738944 13876622
## 5 1000369692278738944 1123725326817873920
## 6 1000369692278738944 21577894
## 7 1000369692278738944 326619024
## 8 1000369692278738944 1002300185391710208
## 9 1000369692278738944 344103743
## 10 1000369692278738944 2347372296
## # … with 3,989,450 more rows
ego_friends_info <- lookup_users(ego_friends$user_id, token = token)
write_rds(ego_friends_info, paste0(ego, "_friends_info.rds"), compress = "gz")
ego_friends_info <- read_rds(paste0(ego, "_friends_info.rds")) %>%
filter(!protected) %>%
select(
user_id, screen_name, lang, name, location, description,
ends_with("count"), -starts_with("quote"),
-starts_with("retweet"), -reply_count,
-starts_with("fav")
) %>%
rename(name = user_id, user_name = name)
id_dict <- ego_friends_info %>%
select(name, screen_name) %>%
deframe()
Esta es la información que corresponde a los amigos de sergemont con mayor número de seguidores.
ego_friends_info %>%
arrange(desc(followers_count)) %>%
select(screen_name, description, location, followers_count, friends_count)
## # A tibble: 2,398 x 5
## screen_name description location followers_count friends_count
## <chr> <chr> <chr> <int> <int>
## 1 TheEconomist "News and analysis wit… London 25235201 127
## 2 BernieSande… "U.S. Senator for Verm… Vermont 13885863 1479
## 3 theweeknd "https://t.co/OP85ArZo… Toronto, … 13542464 1
## 4 KamalaHarris "Vice President-Elect … California 12297917 736
## 5 AOC "US Representative,NY-… Bronx + Q… 10600561 2914
## 6 guardian "The need for independ… London 9319045 1082
## 7 NewYorker "Unparalleled reportin… New York,… 8973522 373
## 8 el_pais "La mejor información … Madrid 7843092 779
## 9 AJEnglish "Hear the human story … Doha, Qat… 6513223 221
## 10 YourAnonCen… "Exposing Human Rights… in your a… 5959311 755
## # … with 2,388 more rows
edge_list <- edge_list %>%
filter(to %in% ego_friends_info$name) %>%
filter(from %in% ego_friends_info$name)
edge_list
## # A tibble: 203,000 x 2
## from to
## <chr> <chr>
## 1 1000369692278738944 63044418
## 2 1000369692278738944 14700117
## 3 1000369692278738944 842242609
## 4 1000369692278738944 1161084242
## 5 1000369692278738944 111619549
## 6 1000369692278738944 80908670
## 7 1000369692278738944 14362241
## 8 1000369692278738944 24889586
## 9 1000369692278738944 200514238
## 10 1000369692278738944 3316287402
## # … with 202,990 more rows
La red personal de seguidores de sergemont que pudimos reconstruir tiene 2398 usuarios con 203000 conexiones.
ego_network <- edge_list %>%
tidygraph::as_tbl_graph() %>%
left_join(ego_friends_info) %>%
rename(name = screen_name, user_id = name) %>%
select(name, everything())
## Estadísticas descriptivas
ego_network <- ego_network %>%
mutate(
out_degree = centrality_degree(mode = "out"),
in_degree = centrality_degree(mode = "in"),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
ego_network
## # A tbl_graph: 2394 nodes and 203000 edges
## #
## # A directed simple graph with 1 component
## #
## # Node Data: 2,394 x 15 (active)
## name user_id lang user_name location description followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 TheW… 100036… en The War … "Brookl… "A podcast… 19565
## 2 digi… 100125… en Digital … "on the… "Building … 3442
## 3 Vall… 100138… es Vallekas… "Valle … "Cuenta of… 2074
## 4 UoMP… 100140… en Policy@M… "" "Influence… 11634
## 5 Sani… 100163… es Carolina… "" "«Si a mí … 110630
## 6 RapP… 100257… es RAP Pací… "" "Entidad p… 1435
## # … with 2,388 more rows, and 8 more variables: friends_count <int>,
## # listed_count <int>, statuses_count <int>, out_degree <dbl>,
## # in_degree <dbl>, betweenness <dbl>, authority_score <dbl>,
## # eigen_centrality <dbl>
## #
## # Edge Data: 203,000 x 2
## from to
## <int> <int>
## 1 1 1852
## 2 1 470
## 3 1 2127
## # … with 202,997 more rows
La siguiente gráfica muestra la influencia de cada usuario en Twitter (eje horizontal) vs la influencia de cada usuario dentro de la red personal de amigos (eje vertical)
ego_network %>%
as_tibble() %>%
#filter(in_degree > 5) %>%
ggplot(aes(followers_count, in_degree)) +
geom_point()
ego_network %>%
as_tibble() %>%
mutate(label_name = ifelse(
test = rank(-followers_count) <= 10 | rank(-in_degree) <= 10,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(followers_count, in_degree)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_name), size = 3)
Clusters
clusters <- igraph::cluster_walktrap(graph = ego_network, steps = 12)
cluster_df <- tibble(cluster = factor(clusters$membership), name = clusters$names)
cluster_df <- cluster_df %>%
group_by(cluster) %>%
filter(n() >= 10) %>%
ungroup()
ego_network <- ego_network %>%
left_join(cluster_df)
ego_network %>%
as_tibble() %>%
arrange(desc(in_degree)) %>%
filter(!is.na(cluster)) %>%
group_by(cluster) %>%
filter(rank(-authority_score) <= 30) %>%
ggplot(aes(label = name, size = log(in_degree), color = in_degree)) +
geom_text_wordcloud_area(family = "Avenir Next Condensed") +
facet_wrap(~cluster) +
labs(title = "Amigos prominentes en cada cluster") +
scale_color_gradient(low = "grey", high = "purple")
Tamaño de cada cluster:
ego_network %>% as_tibble() %>% count(cluster)
## # A tibble: 4 x 2
## cluster n
## <fct> <int>
## 1 1 888
## 2 2 50
## 3 3 538
## 4 4 918
¿Quiénes son los usuarios que funcionan como “puentes”?
ego_network %>%
as_tibble() %>%
arrange(desc(betweenness)) %>%
select(name, description, location)
## # A tibble: 2,394 x 3
## name description location
## <chr> <chr> <chr>
## 1 ErikVergel PhD City Regional Planning @UNC Master Urban Ma… "Bogotá, D.C., …
## 2 spaceforpe… political geographer in Peace Studies at Kent S… "Kent, OH"
## 3 AngelicaLo… Ciudadana, senadora de Colombia 🇨🇴 Partido Verd… ""
## 4 malbarracin Santandereano en el exilio. Abogado, activista … "Bogotá"
## 5 antipodeon… Antipode: A Radical Journal of Geography, publi… ""
## 6 psanabria Public Policy & Management Scholar | Profesor e… "Latin America"
## 7 regstud Supporting and connecting researchers and polic… "Brighton, Engl…
## 8 UrbanTheor… This is the twitter platform of Neil Brenner, c… "Chicago"
## 9 CiderUnian… Twitter oficial del Centro Interdisciplinario d… "Bogotá - Colom…
## 10 PPS_Placem… We bring community-powered public spaces to lif… "New York, NY"
## # … with 2,384 more rows
cols <- c("betweenness", "in_degree", "out_degree", "followers_count", "friends_count")
ego_network %>%
as_tibble() %>%
group_by(cluster) %>%
summarize(across(all_of(cols), mean)) %>%
arrange(desc(betweenness))
## # A tibble: 4 x 6
## cluster betweenness in_degree out_degree followers_count friends_count
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 4287. 67.5 68.8 310769. 1670.
## 2 1 3568. 75.7 72.9 26351. 1393.
## 3 4 3453. 108. 109. 69698. 1724.
## 4 2 2938. 15.6 17.5 26333. 5449.
ego_network_subset <- ego_network %>%
filter(!is.na(cluster)) %>%
mutate(
out_degree = centrality_degree(mode = "out"),
in_degree = centrality_degree(mode = "in"),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
ego_network_subset %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = in_degree),
shape = 21, color = "white", show.legend = FALSE)
ego_network_subset %>%
as_tibble() %>%
mutate(label_id = ifelse(
test = rank(-betweenness) <= 10 |rank(-in_degree) <= 10,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(betweenness, in_degree, color = cluster)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_id), size = 3)
ego_network_subset %>%
group_by(cluster) %>%
mutate(label_name = ifelse(
test = rank(-authority_score) <= 5 | rank(-betweenness) <= 5,
yes = name,
no = NA_character_
)) %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = betweenness),
shape = 21, color = "white", show.legend = FALSE) +
geom_node_label(aes(label = label_name),
repel = TRUE, alpha = 3/4, size = 3)
edge_list <- list.files(paste0(ego, "_friends_of_friends/"), full.names = TRUE) %>%
map(read_rds)
error_index <- edge_list %>%
map_lgl(~ any(class(.x) == "try-error")) %>%
which()
edge_list <- edge_list[-error_index] %>% bind_rows()
edge_list_mutual <- inner_join(
edge_list,
edge_list %>% rename(from = to, to = from)
) %>%
filter(from %in% ego_followers$user_id, to %in% ego_followers$user_id) %>%
filter(from %in% ego_friends$user_id, to %in% ego_friends$user_id) %>%
filter(from %in% to, to %in% from)
mat <- edge_list_mutual %>%
mutate(n = 1) %>%
tidytext::cast_sparse(from, to, n) %>%
as.matrix()
mat <- mat[colnames(mat), ]
mutual_network <- mat %>%
graph_from_adjacency_matrix(mode = "undirected") %>%
tidygraph::as_tbl_graph()
mutual_network
## # A tbl_graph: 1053 nodes and 18582 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 1,053 x 1 (active)
## name
## <chr>
## 1 1890827274
## 2 753376280
## 3 147705809
## 4 1113640592842600448
## 5 12542002
## 6 718862377941397504
## # … with 1,047 more rows
## #
## # Edge Data: 18,582 x 2
## from to
## <int> <int>
## 1 1 8
## 2 1 14
## 3 1 17
## # … with 18,579 more rows
ego_mutuals_info <- lookup_users(as_tibble(mutual_network)$name, token = sample(token), 1)
ego_mutuals_info <- ego_mutuals_info %>%
filter(!protected) %>%
select(
user_id, screen_name, lang, name, location, description,
ends_with("count"), -starts_with("quote"),
-starts_with("retweet"), -reply_count,
-starts_with("fav")
) %>%
rename(name = user_id, user_name = name)
mutual_network <- mutual_network %>%
inner_join(ego_mutuals_info) %>%
rename(name = screen_name, user_id = name) %>%
select(name, everything())
## Estadísticas descriptivas
mutual_network <- mutual_network %>%
mutate(
degree = centrality_degree(),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
La siguiente gráfica muestra la influencia de cada usuario en Twitter (eje horizontal) vs la influencia de cada usuario dentro de la red personal de amigos (eje vertical)
mutual_network %>%
as_tibble() %>%
ggplot(aes(followers_count, degree)) +
geom_point()
mutual_network %>%
as_tibble() %>%
mutate(label_name = ifelse(
test = rank(-followers_count) <= 15 | rank(-degree) <= 15,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(followers_count, degree)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_name), size = 3)
Clusters
clusters <- igraph::cluster_louvain(graph = mutual_network)
cluster_df <- tibble(cluster = factor(clusters$membership), name = clusters$names)
cluster_df <- cluster_df %>%
group_by(cluster) %>%
filter(n() >= 10) %>%
ungroup()
mutual_network <- mutual_network %>%
left_join(cluster_df)
mutual_network %>%
as_tibble() %>%
arrange(desc(degree)) %>%
filter(!is.na(cluster)) %>%
group_by(cluster) %>%
filter(rank(-authority_score) <= 30) %>%
ggplot(aes(label = name, size = log(degree), color = degree)) +
geom_text_wordcloud_area(family = "Avenir Next Condensed") +
facet_wrap(~cluster) +
labs(title = "Usuarios prominentes en cada cluster") +
scale_color_gradient(low = "grey", high = "purple")
Tamaño de cada cluster:
mutual_network %>% as_tibble() %>% count(cluster)
## # A tibble: 7 x 2
## cluster n
## <fct> <int>
## 1 1 79
## 2 2 40
## 3 3 170
## 4 4 140
## 5 5 376
## 6 6 235
## 7 7 13
¿Quiénes son los usuarios que funcionan como “puentes”?
mutual_network %>%
as_tibble() %>%
arrange(desc(betweenness))
## # A tibble: 1,053 x 15
## name user_id lang user_name location description followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Erik… 101526… es C. Erik … "Bogotá… PhD City R… 2585
## 2 psan… 527578… es Pablo Sa… "Latin … Public Pol… 5891
## 3 spac… 152072… en Sara Koo… "Kent, … political … 8399
## 4 malb… 482533… es Mauricio… "Bogotá" Santandere… 39675
## 5 JGar… 585999… es JavierGa… "" Profesor C… 2130
## 6 RAKa… 572136… en Robert A… "Tierra… Colombia p… 9473
## 7 anti… 310297… en Antipode "" Antipode: … 19482
## 8 Ange… 373413… es Angélica… "" Ciudadana,… 464194
## 9 regs… 590838… en Regional… "Bright… Supporting… 8423
## 10 ethe… 296281… es Ethel Se… "London… MPhil/PhD … 847
## # … with 1,043 more rows, and 8 more variables: friends_count <int>,
## # listed_count <int>, statuses_count <int>, degree <dbl>, betweenness <dbl>,
## # authority_score <dbl>, eigen_centrality <dbl>, cluster <fct>
cols <- c("betweenness", "degree", "followers_count", "friends_count")
mutual_network %>%
as_tibble() %>%
group_by(cluster) %>%
summarize(across(all_of(cols), mean)) %>%
arrange(desc(betweenness))
## # A tibble: 7 x 5
## cluster betweenness degree followers_count friends_count
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 7 967. 4.92 15119. 9400.
## 2 3 928. 28.7 3513. 1641.
## 3 5 903. 39.7 14591. 1992.
## 4 6 852. 45.1 2274. 1539.
## 5 4 805. 23.4 1961. 694.
## 6 1 658. 34.9 1435. 1106.
## 7 2 551. 17.0 5194. 2861.
mutual_network_subset <- mutual_network %>%
filter(!is.na(cluster)) %>%
mutate(
degree = centrality_degree(),
betweenness = centrality_betweenness(directed = TRUE),
authority_score = centrality_authority(),
eigen_centrality = centrality_eigen(directed = TRUE)
)
mutual_network_subset %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = degree),
shape = 21, color = "white", show.legend = FALSE)
mutual_network_subset %>%
as_tibble() %>%
mutate(label_id = ifelse(
test = rank(-betweenness) <= 10 |rank(-degree) <= 10,
yes = name,
no = NA_character_)
) %>%
ggplot(aes(betweenness, degree, color = cluster)) +
geom_point() +
ggrepel::geom_label_repel(aes(label = label_id), size = 3)
mutual_network_subset %>%
group_by(cluster) %>%
mutate(label_name = ifelse(
test = rank(-degree) <= 5 | rank(-betweenness) <= 5,
yes = name,
no = NA_character_
)) %>%
ggraph("mds") +
geom_edge_fan(alpha = 1/5, width = 1/5) +
geom_node_point(aes(fill = cluster, size = betweenness),
shape = 21, color = "white", show.legend = FALSE) +
geom_node_label(aes(label = label_name),
repel = TRUE, alpha = 3/4, size = 3)
readLines("rtweet_functions.R") %>%
writeLines()
##
## # main functions ----------------------------------------------------------
##
## multi_get_friends <- function(u, token_list) {
##
## user_info <- lookup_users(u, token = sample(token_list, 1)[[1]])
## fc <- user_info$friends_count
## message("<<", user_info$screen_name, ">> is following ", scales::comma(fc), " users ")
##
## if (user_info$protected) stop(call. = FALSE, "The account is protected, we can't get followers.")
##
## num_queries <- ceiling(fc / 5000)
## rl <- rate_limit(token_list, "get_friends")
## rl <- validate_rate_limit(rl, "get_friends", token_list)
##
## index <- get_available_token_index(rl)
##
## # Case 0: User doesn't have any friends
##
## if (fc == 0) return(tibble(from = character(0), to = character(0)))
##
## # Case 1: Less than 5,000 friends, only call is needed
##
## if (fc <= 5e3) {
##
## friends <- get_friends(u, token = token_list[[index]])
##
## } else {
##
## # Case 2: Many calls are needed
##
## output <- vector("list", length = num_queries)
## output[[1]] <- get_friends(u, token = token_list[[index]])
##
## for (i in 2:length(output)) {
##
## rl <- validate_rate_limit(rl, "get_friends", token_list)
## index <- get_available_token_index(rl)
## output[[i]] <- get_friends(u, token = token_list[[index]], page = next_cursor(output[[i - 1]]))
##
## }
##
## friends <- bind_rows(output) %>%
## distinct()
##
## }
##
## attr(friends, "next_cursor") <- NULL
##
## friends %>%
## rename(from = user, to = user_id) %>%
## mutate(from = user_info$user_id)
##
## }
##
## multi_get_timeline <- function(u, n, token_list, home = FALSE) {
##
## message(u)
## rl <- rate_limit(token_list, "get_timeline")
## rl <- validate_rate_limit(rl, "get_timeline", token_list)
##
## index <- get_available_token_index(rl)
##
## # Case 0: User doesn't have any posts
##
## # what to do?
##
## # Should we allow to get all the timeline??? If so, mimic previous function
##
## tl <- get_timeline(u, n = n, home = home, token = token_list[[index]])
##
## return(tl)
##
## }
##
## # multi_lookup_users <- function() {
## #
## #
## # }
##
##
## # helpers -----------------------------------------------------------------
##
## validate_rate_limit <- function(rl, q, token_list) {
##
## if (is_empty(rl)) {
## message("Waiting for rate limiting update")
## Sys.sleep(60)
## rl <- rate_limit(token_list, query = q)
## validate_rate_limit(rl, q, token_list) # recursion!
##
## }
##
## if (all(rl$remaining == 0)) {
##
## message("Waiting for token reset in ", round(min(rl$reset), 1), " minutes")
## Sys.sleep(min(as.numeric(rl$reset_at - Sys.time(), units = "secs")) + 5)
## rl <- rate_limit(token_list, query = q)
## validate_rate_limit(rl, q, token_list) # recursion!
##
## }
##
## rl
##
## }
##
## get_available_token_index <- function(rl) {
##
## env <- rlang::caller_env()
## available_token <- rl$remaining > 0
## index <- which(available_token)[[1]]
## env$rl[index, ]$remaining <- rl[index, ]$remaining - 1 # this modifies the rl obj in the parent frame
## return(index)
##
## }
theme_custom
## function (base_family = "Avenir Next Condensed", fill = "white", ...) {
## theme_minimal(base_family = base_family, ...) %+replace%
## theme(plot.title = element_text(face = "bold", margin = margin(0,
## 0, 5, 0), hjust = 0, size = 13), plot.subtitle = element_text(face = "italic",
## margin = margin(0, 0, 5, 0), hjust = 0), plot.background = element_rect(fill = fill,
## size = 0), complete = TRUE, axis.title.x = element_text(margin = margin(15,
## 0, 0, 0)), axis.title.y = element_text(angle = 90,
## margin = margin(0, 20, 0, 0)), strip.text = element_text(face = "italic",
## colour = "white"), strip.background = element_rect(fill = "#4C4C4C"))
## }